---
title: "R Script for Higher Learning, Broader Minds"
output:
  html_document:
    toc: true
    toc_depth: 3
date: "`r Sys.Date()`"
---

# Overview
This R Markdown follows the Verma & Verma (2024) seven-stage SEM workflow. If you are replicating our study, note that our script doest the following:
1. It prioritises weighted `WLSMV` estimation for the ordered-indicator models, with principled fall-backs only if the primary SEM attempt is unusable.
2. It automatically rejects degenerate SEM fits in which the key structural paths and latent `R^2` values collapse to zero.
3. It corrects the item-direction coding item by item so that higher scores consistently indicate greater openness and more pro-immigration attitudes.
4. It keeps the original 5-point ordinal structure of the focal indicators.
5. It removes p-values and significance stars from manuscript-ready outputs.

# Stage 0. Setup
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

required_pkgs <- c(
  "haven", "dplyr", "tidyr", "forcats", "ggplot2", "lavaan", "knitr", "tibble"
)

to_install <- required_pkgs[
  !vapply(required_pkgs, requireNamespace, FUN.VALUE = logical(1), quietly = TRUE)
]

if (length(to_install) > 0) {
  install.packages(to_install)
}

invisible(lapply(required_pkgs, library, character.only = TRUE))

has_semPlot <- requireNamespace("semPlot", quietly = TRUE)
```

# Stage 1. File path and helper functions
```{r}
data_path <- "bsa2023data.dta"

if (!file.exists(data_path)) {
  stop(
    "Could not find the data file: ", data_path,
    "\nPlace this .Rmd file in the same folder as the .dta file, or edit data_path."
  )
}


to_num <- function(x) {
  as.numeric(x)
}

keep_1_5 <- function(x) {
  x <- to_num(x)
  x[!x %in% 1:5] <- NA_real_
  x
}

reverse_1_5 <- function(x) {
  x <- keep_1_5(x)
  ifelse(is.na(x), NA_real_, 6 - x)
}

row_mean_available <- function(data, vars) {
  out <- rowMeans(data[, vars], na.rm = TRUE)
  out[is.nan(out)] <- NA_real_
  out
}

weighted_mean_na <- function(x, w) {
  ok <- !is.na(x) & !is.na(w)
  if (!any(ok)) return(NA_real_)
  stats::weighted.mean(x[ok], w[ok])
}

safe_fit_measures <- function(fit, measures = c("cfi", "tli", "rmsea", "srmr")) {
  out <- stats::setNames(rep(NA_real_, length(measures)), measures)

  for (m in measures) {
    primary_val <- tryCatch(
      as.numeric(lavaan::fitMeasures(fit, m)),
      error = function(e) NA_real_
    )

    if (!is.finite(primary_val) || is.na(primary_val)) {
      primary_val <- tryCatch(
        as.numeric(lavaan::fitMeasures(fit, m, fm.args = list(robust = FALSE))),
        error = function(e) NA_real_
      )
    }

    out[m] <- primary_val
  }

  out
}

safe_option <- function(opt, name, default = NA) {
  value <- opt[[name]]
  if (is.null(value)) default else value
}

safe_nobs_lavaan <- function(fit) {
  candidates <- list(
    tryCatch(lavaan::lavInspect(fit, "nobs"), error = function(e) NULL),
    tryCatch(lavaan::lavInspect(fit, "ntotal"), error = function(e) NULL),
    tryCatch(slot(slot(fit, "SampleStats"), "nobs"), error = function(e) NULL),
    tryCatch(slot(slot(fit, "SampleStats"), "ntotal"), error = function(e) NULL),
    tryCatch(slot(slot(fit, "Data"), "nobs"), error = function(e) NULL)
  )

  for (cand in candidates) {
    if (is.null(cand)) next
    if (is.list(cand)) cand <- unlist(cand, use.names = FALSE)
    cand_num <- suppressWarnings(as.numeric(cand))
    cand_num <- cand_num[is.finite(cand_num)]
    if (length(cand_num) > 0) return(sum(cand_num))
  }

  NA_real_
}

extract_fit <- function(fit, model_name) {
  fm <- safe_fit_measures(fit)
  opt <- tryCatch(lavaan::lavInspect(fit, "options"), error = function(e) list())

  tibble::tibble(
    model = model_name,
    estimator = safe_option(opt, "estimator", NA_character_),
    missing = safe_option(opt, "missing", NA_character_),
    fixed_x = isTRUE(safe_option(opt, "fixed.x", FALSE)),
    conditional_x = isTRUE(safe_option(opt, "conditional.x", FALSE)),
    nobs = safe_nobs_lavaan(fit),
    cfi = unname(fm["cfi"]),
    tli = unname(fm["tli"]),
    rmsea = unname(fm["rmsea"]),
    srmr = unname(fm["srmr"])
  )
}

safe_parameter_estimates <- function(fit, standardized = TRUE) {
  tryCatch(
    lavaan::parameterEstimates(fit, standardized = standardized) %>%
      tibble::as_tibble(),
    error = function(e) tibble::tibble()
  )
}

extract_structural_rows <- function(fit, x_vars = character(0)) {
  pe <- safe_parameter_estimates(fit, standardized = TRUE)

  pe %>%
    dplyr::filter(
      .data$op == "~",
      .data$lhs %in% c("Open", "Att"),
      .data$rhs %in% c(x_vars, "Open")
    ) %>%
    dplyr::mutate(
      est = as.numeric(.data$est),
      std.all = as.numeric(.data$std.all)
    )
}

is_usable_sem_fit <- function(fit, x_vars = character(0), tol = 1e-6) {
  if (!inherits(fit, "lavaan")) return(FALSE)

  converged <- tryCatch(lavaan::lavInspect(fit, "converged"), error = function(e) FALSE)
  if (!isTRUE(converged)) return(FALSE)

  struct_rows <- extract_structural_rows(fit, x_vars = x_vars)
  if (nrow(struct_rows) == 0) return(FALSE)

  nonzero_paths <- sum(is.finite(struct_rows$est) & abs(struct_rows$est) > tol, na.rm = TRUE)

  r2_vals <- tryCatch(unlist(lavaan::inspect(fit, "rsquare")), error = function(e) numeric(0))
  positive_r2 <- if (length(r2_vals) == 0) TRUE else any(is.finite(r2_vals) & r2_vals > tol)

  vcov_ok <- {
    vc <- tryCatch(stats::vcov(fit), error = function(e) NULL)
    !is.null(vc) && all(is.finite(vc))
  }

  fm <- safe_fit_measures(fit)
  any_fit_measure <- any(is.finite(fm), na.rm = TRUE)

  nonzero_paths > 0 && positive_r2 && (vcov_ok || any_fit_measure)
}

sanitize_x_vars <- function(data, x_vars) {
  if (is.null(x_vars) || length(x_vars) == 0) return(character(0))

  x_vars <- intersect(x_vars, names(data))
  if (length(x_vars) == 0) return(character(0))

  keep <- vapply(
    x_vars,
    function(v) {
      x <- suppressWarnings(as.numeric(data[[v]]))
      finite_x <- x[is.finite(x)]
      length(finite_x) > 0 && stats::var(finite_x) > 0
    },
    FUN.VALUE = logical(1)
  )

  x_vars[keep]
}

fit_ordered_model <- function(model, data, ordered_vars, model_fun = c("cfa", "sem"), weights_var = "weight", x_vars = NULL) {
  model_fun <- match.arg(model_fun)
  fit_fn <- switch(
    model_fun,
    cfa = lavaan::cfa,
    sem = lavaan::sem
  )

  x_vars <- sanitize_x_vars(data, x_vars)

  specs <- if (model_fun == "cfa") {
    list(
      list(
        estimator = "WLSMV",
        missing = "pairwise",
        fixed.x = FALSE,
        conditional.x = FALSE,
        meanstructure = TRUE,
        parameterization = "theta",
        sampling.weights = weights_var,
        std.lv = TRUE
      ),
      list(
        estimator = "WLSMV",
        missing = "listwise",
        fixed.x = FALSE,
        conditional.x = FALSE,
        meanstructure = TRUE,
        parameterization = "theta",
        sampling.weights = weights_var,
        std.lv = TRUE
      ),
      list(
        estimator = "WLSMV",
        missing = "pairwise",
        fixed.x = FALSE,
        conditional.x = FALSE,
        meanstructure = TRUE,
        parameterization = "delta",
        sampling.weights = weights_var,
        std.lv = TRUE
      ),
      list(
        estimator = "PML",
        missing = "available.cases",
        fixed.x = FALSE,
        conditional.x = FALSE,
        meanstructure = TRUE,
        parameterization = "theta",
        sampling.weights = weights_var,
        std.lv = TRUE
      ),
      list(
        estimator = "PML",
        missing = "pairwise",
        fixed.x = FALSE,
        conditional.x = FALSE,
        meanstructure = TRUE,
        parameterization = "theta",
        sampling.weights = weights_var,
        std.lv = TRUE
      )
    )
  } else {
    list(
      list(
        estimator = "WLSMV",
        missing = "pairwise",
        fixed.x = TRUE,
        conditional.x = TRUE,
        meanstructure = TRUE,
        parameterization = "theta",
        sampling.weights = weights_var,
        std.lv = FALSE
      ),
      list(
        estimator = "WLSMV",
        missing = "listwise",
        fixed.x = TRUE,
        conditional.x = TRUE,
        meanstructure = TRUE,
        parameterization = "theta",
        sampling.weights = weights_var,
        std.lv = FALSE
      ),
      list(
        estimator = "WLSMV",
        missing = "pairwise",
        fixed.x = TRUE,
        conditional.x = TRUE,
        meanstructure = TRUE,
        parameterization = "delta",
        sampling.weights = weights_var,
        std.lv = FALSE
      ),
      list(
        estimator = "WLSMV",
        missing = "pairwise",
        fixed.x = FALSE,
        conditional.x = FALSE,
        meanstructure = TRUE,
        parameterization = "theta",
        sampling.weights = weights_var,
        std.lv = FALSE
      ),
      list(
        estimator = "PML",
        missing = "available.cases",
        fixed.x = FALSE,
        conditional.x = FALSE,
        meanstructure = TRUE,
        parameterization = "theta",
        sampling.weights = weights_var,
        std.lv = FALSE
      )
    )
  }

  attempt_log <- character(0)

  for (i in seq_along(specs)) {
    spec <- specs[[i]]

    fit_args <- c(
      list(
        model = model,
        data = data,
        ordered = ordered_vars
      ),
      spec
    )

    fit_try <- tryCatch(
      do.call(fit_fn, fit_args),
      error = function(e) e
    )

    attempt_label <- paste0(
      "Attempt ", i,
      ": estimator=", spec$estimator,
      "; missing=", spec$missing,
      "; fixed.x=", spec$fixed.x,
      "; conditional.x=", spec$conditional.x,
      "; parameterization=", spec$parameterization
    )

    if (inherits(fit_try, "lavaan")) {
      if (model_fun == "sem" && !is_usable_sem_fit(fit_try, x_vars = x_vars)) {
        attempt_log <- c(
          attempt_log,
          paste0(
            "REJECTED: ", attempt_label,
            " | converged but yielded a degenerate structural solution (zero/near-zero key paths, zero R-squared, or unusable vcov/fit measures)."
          )
        )
        next
      }

      attr(fit_try, "estimation_note") <- paste0("Successful ", attempt_label)
      attr(fit_try, "attempt_log") <- c(attempt_log, paste0("SUCCESS: ", attempt_label))
      return(fit_try)
    }

    attempt_log <- c(attempt_log, paste0("FAILED: ", attempt_label, " | ", fit_try$message))
  }

  stop(paste(attempt_log, collapse = "\n"))
}

compute_cr_ave <- function(fit, factor_items) {
  std_sol <- tibble::as_tibble(lavaan::standardizedSolution(fit, se = FALSE, zstat = FALSE, pvalue = FALSE, ci = FALSE))

  out <- lapply(names(factor_items), function(fac) {
    items <- factor_items[[fac]]

    loadings <- std_sol %>%
      dplyr::filter(op == "=~", lhs == fac, rhs %in% items) %>%
      dplyr::arrange(match(rhs, items))

    residuals <- std_sol %>%
      dplyr::filter(op == "~~", lhs == rhs, lhs %in% items) %>%
      dplyr::arrange(match(lhs, items))

    lambda <- loadings$est.std
    theta <- residuals$est.std

    cr <- (sum(lambda, na.rm = TRUE)^2) /
      ((sum(lambda, na.rm = TRUE)^2) + sum(theta, na.rm = TRUE))

    ave <- sum(lambda^2, na.rm = TRUE) /
      (sum(lambda^2, na.rm = TRUE) + sum(theta, na.rm = TRUE))

    tibble::tibble(
      Factor = fac,
      Indicators = length(items),
      CR = cr,
      AVE = ave
    )
  })

  dplyr::bind_rows(out)
}

safe_modindices <- function(fit, minimum_value = 10) {
  tryCatch(
    {
      tibble::as_tibble(lavaan::modindices(fit, sort. = TRUE)) %>%
        dplyr::filter(mi >= minimum_value) %>%
        dplyr::select(lhs, op, rhs, mi, epc, sepc.lv, sepc.all)
    },
    error = function(e) {
      tibble::tibble(note = paste("Modification indices unavailable:", e$message))
    }
  )
}
```

# Stage 2. Load data and identify the ISSP analytic frame
```{r}
raw <- haven::read_dta(data_path)

raw_indicator_names <- c(
  "ISSP_Q7_b_slice", "ISSP_Q7_d_slice", "ISSP_Q6_a_slice",
  "ISSP_Q7_a_slice", "ISSP_Q7_c_slice", "ISSP_Q7_e_slice",
  "ISSP_Q8", "ISSP_Q6_d_slice"
)

issp_raw <- raw %>%
  dplyr::transmute(
    QnrVersion_num = to_num(QnrVersion),
    HEdQual2_num = to_num(HEdQual2),
    weight = to_num(BSA23_final_wt),
    across(dplyr::all_of(raw_indicator_names), to_num)
  ) %>%
  dplyr::filter(QnrVersion_num %in% 8:10)

nrow(issp_raw)
```

# Stage 3. Data preparation
```{r}
valid_education <- issp_raw$HEdQual2_num %in% c(1, 2, 3, 4, 6)
has_any_indicator <- rowSums(
  sapply(raw_indicator_names, function(v) issp_raw[[v]] %in% 1:5),
  na.rm = TRUE
) > 0
complete_all_indicators <- Reduce(
  `&`,
  lapply(raw_indicator_names, function(v) issp_raw[[v]] %in% 1:5)
)

sample_flow <- tibble::tribble(
  ~stage, ~n,
  "All cases in archive file", nrow(raw),
  "ISSP versions 8-10", nrow(issp_raw),
  "Valid education recorded", sum(valid_education, na.rm = TRUE),
  "Valid education + at least one focal indicator", sum(valid_education & has_any_indicator, na.rm = TRUE),
  "Complete cases on all focal indicators (comparison only)", sum(valid_education & complete_all_indicators, na.rm = TRUE)
)

knitr::kable(sample_flow, caption = "Sample-flow summary")
```

```{r}
raw_label_map <- c(
  ISSP_Q7_b_slice = "Immigrants are generally good for Britain's economy",
  ISSP_Q7_d_slice = "Immigrants improve British society by bringing new ideas and cultures",
  ISSP_Q6_a_slice = "Britain should limit the import of foreign products to protect its economy",
  ISSP_Q7_a_slice = "Immigrants increase crime rates",
  ISSP_Q7_c_slice = "Immigrants take jobs away from people born in Britain",
  ISSP_Q7_e_slice = "People born in Britain should be given preference over immigrants",
  ISSP_Q8 = "The number of immigrants to Britain should be increased/reduced",
  ISSP_Q6_d_slice = "Foreigners should not be allowed to buy land in Britain"
)

response_audit <- dplyr::bind_rows(lapply(raw_indicator_names, function(v) {
  tibble::tibble(
    raw_variable = v,
    label = raw_label_map[[v]],
    substantive_1_5 = sum(issp_raw[[v]] %in% 1:5, na.rm = TRUE),
    cant_choose_6 = sum(issp_raw[[v]] == 6, na.rm = TRUE),
    dont_know_8 = sum(issp_raw[[v]] == 8, na.rm = TRUE),
    prefer_not_9 = sum(issp_raw[[v]] == 9, na.rm = TRUE),
    negative_or_not_applicable = sum(issp_raw[[v]] < 0, na.rm = TRUE)
  )
}))

knitr::kable(response_audit, caption = "Raw response audit for focal indicators")
```

```{r}
ordered_vars <- c(
  "open_econ", "open_culture", "open_trade",
  "att_crime", "att_jobs", "att_priority", "att_numbers", "att_land"
)

x_vars <- c("edu_below", "edu_alevel", "edu_otherhe", "edu_degree")

analysis_data <- issp_raw %>%
  dplyr::mutate(
    edu_group = dplyr::case_when(
      HEdQual2_num == 1 ~ "Degree or equivalent",
      HEdQual2_num == 2 ~ "Other higher education",
      HEdQual2_num == 3 ~ "A-level / level 3",
      HEdQual2_num %in% c(4, 5) ~ "Below A-level / other qualification",
      HEdQual2_num == 6 ~ "No qualifications",
      TRUE ~ NA_character_
    ),
    edu_group = factor(
      edu_group,
      levels = c(
        "No qualifications",
        "Below A-level / other qualification",
        "A-level / level 3",
        "Other higher education",
        "Degree or equivalent"
      )
    ),

    # Education dummies with "No qualifications" as reference
    edu_below = as.numeric(edu_group == "Below A-level / other qualification"),
    edu_alevel = as.numeric(edu_group == "A-level / level 3"),
    edu_otherhe = as.numeric(edu_group == "Other higher education"),
    edu_degree = as.numeric(edu_group == "Degree or equivalent"),

    # Higher scores always indicate greater openness / more pro-immigration attitudes
    # Raw BSA/ISSP coding is 1 = strong agreement (or "increase a lot" for Q8) and 5 = strong disagreement
    # Positive immigration items therefore need reversing, whereas anti-immigration items already run
    # in the desired pro-immigration direction once restricted to the substantive 1:5 range.
    open_econ = reverse_1_5(ISSP_Q7_b_slice),
    open_culture = reverse_1_5(ISSP_Q7_d_slice),
    open_trade = keep_1_5(ISSP_Q6_a_slice),
    att_crime = keep_1_5(ISSP_Q7_a_slice),
    att_jobs = keep_1_5(ISSP_Q7_c_slice),
    att_priority = keep_1_5(ISSP_Q7_e_slice),
    att_numbers = reverse_1_5(ISSP_Q8),
    att_land = keep_1_5(ISSP_Q6_d_slice)
  ) %>%
  dplyr::filter(!is.na(weight), !is.na(edu_group)) %>%
  dplyr::filter(dplyr::if_any(dplyr::all_of(ordered_vars), ~ !is.na(.x))) %>%
  dplyr::mutate(
    dplyr::across(dplyr::all_of(c(ordered_vars, x_vars, "weight")), as.numeric)
  )

analysis_data$observed_open <- row_mean_available(
  analysis_data,
  c("open_econ", "open_culture", "open_trade")
)

analysis_data$observed_att <- row_mean_available(
  analysis_data,
  c("att_crime", "att_jobs", "att_priority", "att_numbers", "att_land")
)

nrow(analysis_data)
```

# Stage 4. Descriptive stats before SEM estimation
```{r}
education_distribution <- analysis_data %>%
  dplyr::group_by(edu_group) %>%
  dplyr::summarise(
    unweighted_n = dplyr::n(),
    weighted_n = sum(weight, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  dplyr::mutate(weighted_pct = weighted_n / sum(weighted_n))

knitr::kable(
  education_distribution,
  digits = 3,
  caption = "Weighted education distribution in the analytic ISSP sample"
)
```

```{r}
descriptive_profiles <- analysis_data %>%
  dplyr::group_by(edu_group) %>%
  dplyr::summarise(
    weighted_open_mean = weighted_mean_na(observed_open, weight),
    weighted_att_mean = weighted_mean_na(observed_att, weight),
    unweighted_n = dplyr::n(),
    .groups = "drop"
  )

knitr::kable(
  descriptive_profiles,
  digits = 3,
  caption = "Weighted descriptive profiles by education"
)
```

# Stage 5. Measurement-model evaluation

```{r}
one_factor_model <- '
  General =~ open_econ + open_culture + open_trade +
             att_crime + att_jobs + att_priority + att_numbers + att_land
'

two_factor_cfa_model <- '
  Open =~ open_econ + open_culture + open_trade
  Att  =~ att_crime + att_jobs + att_priority + att_numbers + att_land
'

fit_one_factor <- fit_ordered_model(
  model = one_factor_model,
  data = analysis_data,
  ordered_vars = ordered_vars,
  model_fun = "cfa"
)

fit_two_factor <- fit_ordered_model(
  model = two_factor_cfa_model,
  data = analysis_data,
  ordered_vars = ordered_vars,
  model_fun = "cfa"
)

estimation_overview <- tibble::tibble(
  component = c("One-factor CFA", "Two-factor CFA"),
  estimation = c(attr(fit_one_factor, "estimation_note"), attr(fit_two_factor, "estimation_note"))
)

knitr::kable(estimation_overview, caption = "Estimation overview for the measurement models")
```

```{r}
fit_comparison <- dplyr::bind_rows(
  extract_fit(fit_one_factor, "One-factor CFA"),
  extract_fit(fit_two_factor, "Two-factor CFA")
)

knitr::kable(
  fit_comparison,
  digits = 3,
  caption = "Fit comparison for alternative measurement structures"
)
```

```{r}
latent_correlation <- lavaan::parameterEstimates(
  fit_two_factor,
  standardized = TRUE
) %>%
  tibble::as_tibble() %>%
  dplyr::filter(
    op == "~~",
    (lhs == "Open" & rhs == "Att") | (lhs == "Att" & rhs == "Open")
  ) %>%
  dplyr::transmute(
    relationship = "Open ~~ Att",
    estimate = est,
    std_estimate = std.all
  )

knitr::kable(
  latent_correlation,
  digits = 3,
  caption = "Latent correlation in the two-factor CFA"
)
```

```{r}
factor_items <- list(
  Open = c("open_econ", "open_culture", "open_trade"),
  Att = c("att_crime", "att_jobs", "att_priority", "att_numbers", "att_land")
)

reliability_validity <- compute_cr_ave(fit_two_factor, factor_items)

knitr::kable(
  reliability_validity,
  digits = 3,
  caption = "Composite reliability (CR) and average variance extracted (AVE)"
)
```

# Stage 6. Structural model estimation

Education is entered as a set of dummy variables, using **No qualifications** as the reference category.

```{r}
sem_model <- '
  # Measurement model
  Open =~ open_econ + open_culture + open_trade
  Att  =~ att_crime + att_jobs + att_priority + att_numbers + att_land

  # Structural paths
  Open ~ a1*edu_below + a2*edu_alevel + a3*edu_otherhe + a4*edu_degree
  Att  ~ b*Open + c1*edu_below + c2*edu_alevel + c3*edu_otherhe + c4*edu_degree

  # Indirect effects
  ind_below   := a1*b
  ind_alevel  := a2*b
  ind_otherhe := a3*b
  ind_degree  := a4*b

  # Total effects
  total_below   := c1 + (a1*b)
  total_alevel  := c2 + (a2*b)
  total_otherhe := c3 + (a3*b)
  total_degree  := c4 + (a4*b)
'

fit_sem <- fit_ordered_model(
  model = sem_model,
  data = analysis_data,
  ordered_vars = ordered_vars,
  model_fun = "sem",
  x_vars = x_vars
)

attr(fit_sem, "estimation_note")
```

```{r}
sem_fit_summary <- extract_fit(fit_sem, "Primary weighted SEM")

knitr::kable(
  sem_fit_summary,
  digits = 3,
  caption = "Primary SEM fit summary"
)
```

# Stage 7. Reporting tables and diagnostics

```{r}
indicator_map <- c(
  open_econ = "Immigrants are generally good for Britain's economy",
  open_culture = "Immigrants improve British society by bringing new ideas and cultures",
  open_trade = "Britain should limit the import of foreign products to protect its economy",
  att_crime = "Immigrants increase crime rates",
  att_jobs = "Immigrants take jobs away from people born in Britain",
  att_priority = "People born in Britain should be given preference over immigrants",
  att_numbers = "The number of immigrants to Britain should be increased/reduced",
  att_land = "Foreigners should not be allowed to buy land in Britain"
)

factor_map <- c(
  Open = "Cultural openness",
  Att = "Immigration attitudes"
)

pe_sem <- lavaan::parameterEstimates(fit_sem, standardized = TRUE) %>%
  tibble::as_tibble()

table1_loadings <- pe_sem %>%
  dplyr::filter(op == "=~") %>%
  dplyr::transmute(
    Factor = dplyr::recode(lhs, !!!factor_map),
    Indicator = dplyr::recode(rhs, !!!indicator_map),
    Estimate = est,
    Std_Estimate = std.all
  )

knitr::kable(
  table1_loadings,
  digits = 3,
  caption = "Table 1. Factor loadings from the primary SEM"
)
```

```{r}
predictor_map <- c(
  edu_below = "Below A-level / other qualification (vs no qualifications)",
  edu_alevel = "A-level / level 3 (vs no qualifications)",
  edu_otherhe = "Other higher education (vs no qualifications)",
  edu_degree = "Degree or equivalent (vs no qualifications)",
  Open = "Cultural openness"
)

outcome_map <- c(
  Open = "Cultural openness",
  Att = "Immigration attitudes"
)

table2_paths <- pe_sem %>%
  dplyr::filter(op == "~", lhs %in% c("Open", "Att")) %>%
  dplyr::transmute(
    Outcome = dplyr::recode(lhs, !!!outcome_map),
    Predictor = dplyr::recode(rhs, !!!predictor_map),
    Estimate = est,
    Std_Estimate = std.all
  )

knitr::kable(
  table2_paths,
  digits = 3,
  caption = "Table 2. Structural paths from the primary SEM"
)
```

```{r}
effect_map <- c(
  ind_below = "Indirect effect: below A-level / other qualification",
  ind_alevel = "Indirect effect: A-level / level 3",
  ind_otherhe = "Indirect effect: other higher education",
  ind_degree = "Indirect effect: degree or equivalent",
  total_below = "Total effect: below A-level / other qualification",
  total_alevel = "Total effect: A-level / level 3",
  total_otherhe = "Total effect: other higher education",
  total_degree = "Total effect: degree or equivalent"
)

table3_effects <- pe_sem %>%
  dplyr::filter(op == ":=") %>%
  dplyr::transmute(
    Effect = dplyr::recode(lhs, !!!effect_map),
    Estimate = est
  )

knitr::kable(
  table3_effects,
  digits = 3,
  caption = "Table 3. Indirect and total effects from the primary SEM"
)
```

```{r}
modification_indices <- safe_modindices(fit_sem, minimum_value = 10)

knitr::kable(
  modification_indices,
  digits = 3,
  caption = "Theory-led diagnostic table: modification indices >= 10"
)
```

# Plots

```{r}
education_profile_att <- analysis_data %>%
  dplyr::group_by(edu_group) %>%
  dplyr::summarise(
    weighted_mean = weighted_mean_na(observed_att, weight),
    unweighted_n = dplyr::n(),
    .groups = "drop"
  )

plot_att <- ggplot2::ggplot(
  education_profile_att,
  ggplot2::aes(x = edu_group, y = weighted_mean, group = 1)
) +
  ggplot2::geom_point(size = 3) +
  ggplot2::geom_line() +
  ggplot2::labs(
    x = "Education",
    y = "Weighted mean pro-immigration attitude score",
    title = "Weighted mean pro-immigration attitudes by education"
  ) +
  ggplot2::theme_minimal()

plot_att

ggplot2::ggsave(
  filename = "weighted_mean_immigration_attitudes_by_education.png",
  plot = plot_att,
  width = 7,
  height = 4,
  dpi = 300
)
```

```{r}
education_profile_open <- analysis_data %>%
  dplyr::group_by(edu_group) %>%
  dplyr::summarise(
    weighted_mean = weighted_mean_na(observed_open, weight),
    unweighted_n = dplyr::n(),
    .groups = "drop"
  )

plot_open <- ggplot2::ggplot(
  education_profile_open,
  ggplot2::aes(x = edu_group, y = weighted_mean, group = 1)
) +
  ggplot2::geom_point(size = 3) +
  ggplot2::geom_line() +
  ggplot2::labs(
    x = "Education",
    y = "Weighted mean openness score",
    title = "Weighted mean openness by education"
  ) +
  ggplot2::theme_minimal()

plot_open

ggplot2::ggsave(
  filename = "weighted_mean_openness_by_education.png",
  plot = plot_open,
  width = 7,
  height = 4,
  dpi = 300
)
```

# SEM path diagram

```{r fig.path.diagram, fig.cap="SmartPLS-style SEM path diagram for the weighted ESR model. Blue circles show endogenous latent constructs with R-squared values; yellow rectangles show indicators and education groups. Direct education -> immigration-attitudes control paths are retained in the SEM but omitted from the figure for visual clarity.", out.width="100%"}
format_std_label <- function(x) {
  if (length(x) == 0 || is.na(x) || !is.finite(x)) return("")
  sprintf("%.3f", as.numeric(x))
}

get_std_from_pe <- function(lhs_name, op_name, rhs_name, pe_tbl = pe_sem) {
  idx <- which(pe_tbl$lhs == lhs_name & pe_tbl$op == op_name & pe_tbl$rhs == rhs_name)
  if (length(idx) == 0) return(NA_real_)
  as.numeric(pe_tbl$std.all[idx[1]])
}

get_r2_from_fit <- function(fit, vars = c("Open", "Att")) {
  out <- stats::setNames(rep(NA_real_, length(vars)), vars)
  r2_raw <- tryCatch(lavaan::inspect(fit, "rsquare"), error = function(e) NULL)
  if (!is.null(r2_raw)) {
    r2_raw <- unlist(r2_raw)
    common <- intersect(names(r2_raw), vars)
    out[common] <- as.numeric(r2_raw[common])
  }
  out
}

render_smartpls_style_sem <- function(fit, pe_tbl, file = "sem_path_diagram.png") {
  if (file.exists(file)) unlink(file)

  r2_vals <- get_r2_from_fit(fit)

  node_df <- data.frame(
    name = c(
      "edu_below", "edu_alevel", "edu_otherhe", "edu_degree",
      "Open", "Att",
      "open_econ", "open_culture", "open_trade",
      "att_crime", "att_jobs", "att_priority", "att_numbers", "att_land"
    ),
    x = c(
      0.12, 0.12, 0.12, 0.12,
      0.38, 0.69,
      0.26, 0.38, 0.50,
      0.90, 0.90, 0.90, 0.90, 0.90
    ),
    y = c(
      0.76, 0.64, 0.52, 0.40,
      0.58, 0.58,
      0.88, 0.88, 0.88,
      0.76, 0.64, 0.52, 0.40, 0.28
    ),
    type = c(
      rep("rect", 4), "circle", "circle",
      rep("rect", 3), rep("rect", 5)
    ),
    label = c(
      "Below\nA-level", "A-level", "Other\nHE", "Degree",
      "Cultural\nopenness", "Immigration\nattitudes",
      "Economy", "Culture", "Trade",
      "Crime", "Jobs", "Priority", "Numbers", "Land"
    ),
    w = c(
      0.14, 0.14, 0.14, 0.14,
      NA, NA,
      0.13, 0.13, 0.13,
      0.13, 0.13, 0.13, 0.13, 0.13
    ),
    h = c(
      0.065, 0.065, 0.065, 0.065,
      NA, NA,
      0.055, 0.055, 0.055,
      0.055, 0.055, 0.055, 0.055, 0.055
    ),
    r = c(
      NA, NA, NA, NA,
      0.075, 0.075,
      NA, NA, NA, NA, NA, NA, NA, NA
    ),
    stringsAsFactors = FALSE
  )

  node_fill <- c(rect = "#FFF200", circle = "#1976D2")
  node_border <- c(rect = "#C6BC00", circle = "#1976D2")

  get_node <- function(name) {
    node_df[node_df$name == name, , drop = FALSE]
  }

  boundary_point <- function(node, target_x, target_y) {
    dx <- target_x - node$x
    dy <- target_y - node$y
    d <- sqrt(dx^2 + dy^2)
    if (d == 0) return(c(x = node$x, y = node$y))

    ux <- dx / d
    uy <- dy / d

    if (node$type == "circle") {
      return(c(x = node$x + node$r * ux, y = node$y + node$r * uy))
    }

    tx <- if (abs(ux) < 1e-8) Inf else (node$w / 2) / abs(ux)
    ty <- if (abs(uy) < 1e-8) Inf else (node$h / 2) / abs(uy)
    t <- min(tx, ty)

    c(x = node$x + ux * t, y = node$y + uy * t)
  }

  edge_list <- list(
    list(from = "edu_below",   to = "Open", coef = get_std_from_pe("Open", "~",  "edu_below", pe_tbl),   lwd = 1.6, label_dx = -0.020, label_dy = 0.018, label_cex = 0.76),
    list(from = "edu_alevel",  to = "Open", coef = get_std_from_pe("Open", "~",  "edu_alevel", pe_tbl),  lwd = 1.6, label_dx = -0.020, label_dy = 0.014, label_cex = 0.76),
    list(from = "edu_otherhe", to = "Open", coef = get_std_from_pe("Open", "~",  "edu_otherhe", pe_tbl), lwd = 1.6, label_dx = -0.020, label_dy = 0.010, label_cex = 0.76),
    list(from = "edu_degree",  to = "Open", coef = get_std_from_pe("Open", "~",  "edu_degree", pe_tbl),  lwd = 1.6, label_dx = -0.020, label_dy = 0.006, label_cex = 0.76),

    list(from = "Open", to = "Att", coef = get_std_from_pe("Att", "~", "Open", pe_tbl), lwd = 2.0, label_dx = 0.000, label_dy = 0.028, label_cex = 0.86),

    list(from = "Open", to = "open_econ",    coef = get_std_from_pe("Open", "=~", "open_econ", pe_tbl),    lwd = 1.4, label_dx = -0.020, label_dy = 0.018, label_cex = 0.78),
    list(from = "Open", to = "open_culture", coef = get_std_from_pe("Open", "=~", "open_culture", pe_tbl), lwd = 1.4, label_dx =  0.000, label_dy = 0.020, label_cex = 0.78),
    list(from = "Open", to = "open_trade",   coef = get_std_from_pe("Open", "=~", "open_trade", pe_tbl),   lwd = 1.4, label_dx =  0.020, label_dy = 0.018, label_cex = 0.78),

    list(from = "Att", to = "att_crime",    coef = get_std_from_pe("Att", "=~", "att_crime", pe_tbl),    lwd = 1.4, label_dx = -0.008, label_dy = 0.018, label_cex = 0.78),
    list(from = "Att", to = "att_jobs",     coef = get_std_from_pe("Att", "=~", "att_jobs", pe_tbl),     lwd = 1.4, label_dx = -0.004, label_dy = 0.018, label_cex = 0.78),
    list(from = "Att", to = "att_priority", coef = get_std_from_pe("Att", "=~", "att_priority", pe_tbl), lwd = 1.4, label_dx = -0.002, label_dy = 0.016, label_cex = 0.78),
    list(from = "Att", to = "att_numbers",  coef = get_std_from_pe("Att", "=~", "att_numbers", pe_tbl),  lwd = 1.4, label_dx = -0.002, label_dy = 0.016, label_cex = 0.78),
    list(from = "Att", to = "att_land",     coef = get_std_from_pe("Att", "=~", "att_land", pe_tbl),     lwd = 1.4, label_dx = -0.002, label_dy = 0.016, label_cex = 0.78)
  )

  grDevices::png(
    filename = file,
    width = 3200,
    height = 1800,
    res = 300,
    bg = "white"
  )

  old_par <- graphics::par(no.readonly = TRUE)
  on.exit({
    graphics::par(old_par)
    grDevices::dev.off()
  }, add = TRUE)

  graphics::par(mar = c(0.3, 0.3, 0.3, 0.3), xpd = NA)
  graphics::plot.new()
  graphics::plot.window(xlim = c(0, 1), ylim = c(0, 1))

  label_positions <- vector("list", length(edge_list))

  for (i in seq_along(edge_list)) {
    e <- edge_list[[i]]
    from_node <- get_node(e$from)
    to_node <- get_node(e$to)

    p1 <- boundary_point(from_node, to_node$x, to_node$y)
    p2 <- boundary_point(to_node, from_node$x, from_node$y)

    graphics::arrows(
      x0 = p1["x"], y0 = p1["y"], x1 = p2["x"], y1 = p2["y"],
      length = 0.08,
      angle = 20,
      code = 2,
      col = "black",
      lwd = e$lwd
    )

    if (is.finite(e$coef)) {
      lx <- p1["x"] + 0.56 * (p2["x"] - p1["x"]) + e$label_dx
      ly <- p1["y"] + 0.56 * (p2["y"] - p1["y"]) + e$label_dy
      label_positions[[i]] <- list(
        x = lx,
        y = ly,
        label = format_std_label(e$coef),
        cex = e$label_cex
      )
    }
  }

  for (i in seq_len(nrow(node_df))) {
    node <- node_df[i, ]

    if (node$type == "rect") {
      graphics::rect(
        xleft = node$x - node$w / 2,
        ybottom = node$y - node$h / 2,
        xright = node$x + node$w / 2,
        ytop = node$y + node$h / 2,
        col = node_fill[[node$type]],
        border = node_border[[node$type]],
        lwd = 1.4
      )
      graphics::text(node$x, node$y, labels = node$label, cex = 0.84, family = "sans")
    } else {
      graphics::symbols(
        x = node$x,
        y = node$y,
        circles = node$r,
        inches = FALSE,
        add = TRUE,
        bg = node_fill[[node$type]],
        fg = node_border[[node$type]],
        lwd = 1.8
      )
      graphics::text(
        x = node$x,
        y = node$y + 0.002,
        labels = if (is.na(r2_vals[[node$name]])) "" else sprintf("%.3f", r2_vals[[node$name]]),
        col = "white",
        font = 2,
        cex = 0.95,
        family = "sans"
      )
      graphics::text(
        x = node$x,
        y = node$y - node$r - 0.060,
        labels = node$label,
        col = "black",
        font = 2,
        cex = 0.95,
        family = "sans"
      )
    }
  }

  for (lab in label_positions) {
    if (is.null(lab)) next
    graphics::text(
      x = lab$x,
      y = lab$y,
      labels = lab$label,
      cex = lab$cex,
      family = "sans"
    )
  }

  invisible(file)
}

path_diagram_status <- tryCatch(
  {
    render_smartpls_style_sem(fit_sem, pe_sem, "sem_path_diagram.png")
    "Path diagram rendered successfully."
  },
  error = function(e) {
    paste("Path diagram rendering failed:", e$message)
  }
)

path_diagram_status

if (file.exists("sem_path_diagram.png")) {
  knitr::include_graphics("sem_path_diagram.png")
} else {
  cat(path_diagram_status)
}
```

# Export manuscript tables and logs
```{r}
write.csv(sample_flow, "sample_flow_bsa2023_sem.csv", row.names = FALSE)
write.csv(response_audit, "response_audit_bsa2023_sem.csv", row.names = FALSE)
write.csv(education_distribution, "education_distribution_bsa2023_sem.csv", row.names = FALSE)
write.csv(descriptive_profiles, "descriptive_profiles_bsa2023_sem.csv", row.names = FALSE)
write.csv(estimation_overview, "estimation_overview_measurement_models.csv", row.names = FALSE)
write.csv(fit_comparison, "model_fit_comparison_bsa2023_sem.csv", row.names = FALSE)
write.csv(latent_correlation, "latent_correlation_bsa2023_sem.csv", row.names = FALSE)
write.csv(reliability_validity, "reliability_validity_bsa2023_sem.csv", row.names = FALSE)
write.csv(sem_fit_summary, "sem_fit_summary_bsa2023_sem.csv", row.names = FALSE)
write.csv(table1_loadings, "table1_factor_loadings_bsa2023_sem.csv", row.names = FALSE)
write.csv(table2_paths, "table2_structural_paths_bsa2023_sem.csv", row.names = FALSE)
write.csv(table3_effects, "table3_indirect_total_effects_bsa2023_sem.csv", row.names = FALSE)
write.csv(modification_indices, "modification_indices_bsa2023_sem.csv", row.names = FALSE)
write.csv(education_profile_att, "weighted_profile_attitudes_bsa2023_sem.csv", row.names = FALSE)
write.csv(education_profile_open, "weighted_profile_openness_bsa2023_sem.csv", row.names = FALSE)

writeLines(attr(fit_one_factor, "attempt_log"), "estimation_log_one_factor.txt")
writeLines(attr(fit_two_factor, "attempt_log"), "estimation_log_two_factor.txt")
writeLines(attr(fit_sem, "attempt_log"), "estimation_log_sem.txt")
```

# Session information
```{r}
sessionInfo()
```
